Detailed Survival analyis of the Survival lung data.
Libraries
library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('keep.trailing.zeros',TRUE)
Libraries
data(lung)
## Warning in data(lung): data set 'lung' not found
lung$inst <- NULL
lung$status <- lung$status - 1
lung <- lung[complete.cases(lung),]
pander::pander(table(lung$status))
pander::pander(summary(lung$time))
Exploring Raw Features with RRPlot
convar <- colnames(lung)[lapply(apply(lung,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(lung[,c("status",convar)],"status")
pander::pander(topvar)
topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]
RRanalysis <- list();
idx <- 1
for (topf in topFive)
{
RRanalysis[[idx]] <- RRPlot(cbind(lung$status,lung[,topf]),
atRate=c(0.90),
timetoEvent=lung$time,
title=topf,
# plotRR=FALSE
)
idx <- idx + 1
}






names(RRanalysis) <- topFive
Reporting the Metrics
ROCAUC <- NULL
CstatCI <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL
for (topf in topFive)
{
CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
}
rownames(CstatCI) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive
pander::pander(ROCAUC)
| age |
0.59 |
0.492 |
0.687 |
| wt.loss |
0.56 |
0.461 |
0.658 |
pander::pander(CstatCI)
| age |
0.558 |
0.557 |
0.498 |
0.617 |
| wt.loss |
0.514 |
0.514 |
0.450 |
0.569 |
pander::pander(LogRangp)
pander::pander(Sensitivity)
| age |
0.1157 |
0.0647 |
0.187 |
| wt.loss |
0.0496 |
0.0184 |
0.105 |
pander::pander(Specificity)
| age |
0.872 |
0.743 |
0.952 |
| wt.loss |
0.894 |
0.769 |
0.965 |
meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],Sensitivity[,1],Specificity[,1])
colnames(meanMatrix) <- c("ROCAUC","C-Stat","Sen","Spe")
pander::pander(meanMatrix)
| age |
0.59 |
0.558 |
0.1157 |
0.872 |
| wt.loss |
0.56 |
0.514 |
0.0496 |
0.894 |
Modeling
ml <- BSWiMS.model(Surv(time,status)~1,data=lung,NumberofRepeats = 10)
[+++++++++++++++++++-++++++++]..
sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
| ph.ecog |
4.32e-01 |
1.194 |
1.541 |
1.988 |
0.679 |
0.649 |
| sex |
-4.59e-01 |
0.456 |
0.632 |
0.876 |
0.649 |
0.679 |
| pat.karno |
-1.77e-03 |
0.997 |
0.998 |
1.000 |
0.506 |
0.720 |
| ph.karno |
-2.90e-07 |
1.000 |
1.000 |
1.000 |
0.577 |
0.720 |
| age |
9.13e-08 |
1.000 |
1.000 |
1.000 |
0.565 |
0.720 |
Table continues below
| ph.ecog |
0.601 |
0.601 |
0.620 |
0.600 |
0.0449 |
0.405 |
| sex |
0.601 |
0.620 |
0.601 |
0.600 |
0.0285 |
0.478 |
| pat.karno |
0.506 |
0.585 |
0.500 |
0.585 |
0.0292 |
0.342 |
| ph.karno |
0.577 |
0.570 |
0.500 |
0.570 |
0.0143 |
0.280 |
| age |
0.565 |
0.549 |
0.500 |
0.549 |
0.0162 |
0.195 |
| ph.ecog |
3.33 |
2.48 |
-0.02005 |
1.0 |
| sex |
2.76 |
2.85 |
-0.00167 |
1.0 |
| pat.karno |
2.44 |
2.24 |
0.08546 |
1.0 |
| ph.karno |
2.22 |
1.64 |
0.06998 |
0.5 |
| age |
1.97 |
1.14 |
0.04871 |
0.2 |
Expected time to event
toinclude <- rdata[,1]==1
obstiemToEvent <- lung$time
tmin<-min(obstiemToEvent)
sum(toinclude)
[1] 121
timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
tmax<-max(c(obstiemToEvent,timetoEvent))
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
| timetoEvent[toinclude] |
0.808 |
0.0509 |
15.9 |
2.92e-31 |
Fitting linear model: obstiemToEvent[toinclude] ~ 0 +
timetoEvent[toinclude]
| 121 |
200 |
0.677 |
0.675 |
plot(timetoEvent,obstiemToEvent,
col=1+rdata[,1],
xlab="Expected time",
ylab="Observed time",
main="Expected vs. Observed",
xlim=c(tmin,tmax),
ylim=c(tmin,tmax),
log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
pch=c(1,1,-1),
col=c(1,2,"blue"),
lty=c(-1,-1,1)
)

MADerror2 <- mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude]))
pander::pander(MADerror2)
163
Cox Calibration
op <- par(no.readonly = TRUE)
calprob <- CoxRiskCalibration(ml,lung,"status","time")
pander::pander(c(h0=calprob$h0,
Gain=calprob$hazardGain,
DeltaTime=calprob$timeInterval),
caption="Cox Calibration Parameters")
Expected time to event
timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
tmax<-max(c(obstiemToEvent,timetoEvent))
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
| timetoEvent[toinclude] |
0.947 |
0.0597 |
15.9 |
2.92e-31 |
Fitting linear model: obstiemToEvent[toinclude] ~ 0 +
timetoEvent[toinclude]
| 121 |
200 |
0.677 |
0.675 |
plot(timetoEvent,obstiemToEvent,
col=1+rdata[,1],
xlab="Expected time",
ylab="Observed time",
main="Expected vs. Observed",
xlim=c(tmin,tmax),
ylim=c(tmin,tmax),
log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
pch=c(1,1,-1),
col=c(1,2,"blue"),
lty=c(-1,-1,1)
)

MADerror2 <-c(MADerror2,mean(abs(timetoEvent-obstiemToEvent)))
pander::pander(MADerror2)
163 and 169
Cross-Validation
rcv <- randomCV(theData=lung,
theOutcome = Surv(time,status)~1,
fittingFunction=BSWiMS.model,
trainFraction = 0.95,
repetitions=200,
classSamplingType = "Pro"
)
.[++].[+++].[+++].[+].[+++].[+].[++].[+++].[+++].[+++]10 Tested: 84
Avg. Selected: 3.4 Min Tests: 1 Max Tests: 3 Mean Tests: 1.190476 . MAD:
0.4813504
.[+++].[+++].[+++].[++].[++-].[+++].[+++].[+++].[+++].[++]20 Tested:
117 Avg. Selected: 3.55 Min Tests: 1 Max Tests: 5 Mean Tests: 1.709402 .
MAD: 0.4775247
.[+++].[+++].[+++].[+].[++].[+++].[++++].[++].[++].[++]30 Tested: 145
Avg. Selected: 3.533333 Min Tests: 1 Max Tests: 7 Mean Tests: 2.068966 .
MAD: 0.474251
.[+++].[+++].[++].[+++].[+++].[++].[+++].[+].[++].[+++]40 Tested: 158
Avg. Selected: 3.525 Min Tests: 1 Max Tests: 8 Mean Tests: 2.531646 .
MAD: 0.4758661
.[+].[+++].[+++].[++].[+++].[+++].[+++].[+++].[+++].[+++]50 Tested:
162 Avg. Selected: 3.56 Min Tests: 1 Max Tests: 8 Mean Tests: 3.08642 .
MAD: 0.4761832
.[+++].[++].[++++].[+++].[+++].[+++].[+++].[+++].[+++].[+++]60
Tested: 164 Avg. Selected: 3.633333 Min Tests: 1 Max Tests: 9 Mean
Tests: 3.658537 . MAD: 0.4765342
.[+++].[+++].[+++].[+++].[+++].[+++].[++].[+++].[++].[++]70 Tested:
166 Avg. Selected: 3.642857 Min Tests: 1 Max Tests: 11 Mean Tests:
4.216867 . MAD: 0.4760812
.[++].[++].[+++].[+++].[+++].[+++-].[++].[++++].[++].[++]80 Tested:
167 Avg. Selected: 3.6375 Min Tests: 1 Max Tests: 12 Mean Tests:
4.790419 . MAD: 0.4751644
.[++].[++].[+++].[++].[+++].[+++].[+++].[++].[+++].[+++]90 Tested:
167 Avg. Selected: 3.633333 Min Tests: 1 Max Tests: 12 Mean Tests:
5.389222 . MAD: 0.4751534
.[++-].[++-].[++-].[++].[+++].[+++].[+++].[++].[++].[+++]100 Tested:
167 Avg. Selected: 3.61 Min Tests: 1 Max Tests: 14 Mean Tests: 5.988024
. MAD: 0.475543
.[+].[+++].[++].[++].[+++].[++].[++-].[++].[++].[++-]110 Tested: 167
Avg. Selected: 3.563636 Min Tests: 1 Max Tests: 16 Mean Tests: 6.586826
. MAD: 0.4753595
.[+++].[+++].[+++].[+++].[+++].[++++].[+++].[++].[+++].[+++]120
Tested: 167 Avg. Selected: 3.6 Min Tests: 1 Max Tests: 16 Mean Tests:
7.185629 . MAD: 0.475331
.[+].[++].[++].[++].[++].[+++].[+++].[+++].[+++].[+++]130 Tested: 167
Avg. Selected: 3.584615 Min Tests: 1 Max Tests: 18 Mean Tests: 7.784431
. MAD: 0.4753503
.[++++].[+++].[+].[+].[+].[+].[+++].[+++].[+++].[+++]140 Tested: 167
Avg. Selected: 3.557143 Min Tests: 1 Max Tests: 18 Mean Tests: 8.383234
. MAD: 0.4754759
.[+++].[+++].[+++].[+++].[+++].[+++].[++++].[++].[++].[++]150 Tested:
168 Avg. Selected: 3.573333 Min Tests: 1 Max Tests: 19 Mean Tests:
8.928571 . MAD: 0.475539
.[+++].[++].[+++].[++].[++].[++].[+++].[++-].[++].[++++]160 Tested:
168 Avg. Selected: 3.56875 Min Tests: 1 Max Tests: 19 Mean Tests:
9.52381 . MAD: 0.4755886
.[++++].[+++].[++].[+].[+++].[+++].[+++].[++++].[++].[+++]170 Tested:
168 Avg. Selected: 3.576471 Min Tests: 1 Max Tests: 20 Mean Tests:
10.11905 . MAD: 0.4758507
.[+++].[+++].[+++].[++-].[++].[+++].[+++].[+++].[+].[+++]180 Tested:
168 Avg. Selected: 3.577778 Min Tests: 1 Max Tests: 20 Mean Tests:
10.71429 . MAD: 0.4756863
.[+].[++-].[+++].[++].[+++].[+++].[+++].[+++].[++].[++]190 Tested:
168 Avg. Selected: 3.568421 Min Tests: 1 Max Tests: 22 Mean Tests:
11.30952 . MAD: 0.4751899
.[+++].[+].[++].[+].[+].[++].[+++].[++].[+++].[+++]200 Tested: 168
Avg. Selected: 3.545 Min Tests: 1 Max Tests: 23 Mean Tests: 11.90476 .
MAD: 0.4749347
stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]
bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)
rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names
rrAnalysisTest <- RRPlot(rdatacv,atRate=c(0.90),
timetoEvent=times,
title="Test: Lung Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)






Calibrating the test results
rdatacv <- cbind(status,prob,times)
calprob <- CalibrationProbPoissonRisk(rdatacv)
pander::pander(c(h0=calprob$h0,
Gain=calprob$hazardGain,
DeltaTime=calprob$timeInterval),
caption="Cox Calibration Parameters")
timeinterval <- calprob$timeInterval;
rdata <- cbind(status,calprob$prob)
rrAnalysisTest <- RRPlot(rdata,atRate=c(0.90),
timetoEvent=times,
title="Calibrated Test: Lung",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)





